/*
 
 Program readeneu.sas reads quarterly ENEU files 1-1987 to 2-2003. 

 Kumler, Verhoogen, Frias "Enlisting Employees ..." REStat forthcoming

*/


 
%include 'housekeeping.sas';

*********************************************************************************;
***************************** define macros *************************************;
*********************************************************************************;

******************************* main macro **************************************;
* Macro is invoked at bottom of code below;
%macro readeneu(firstyr, lastyr, ncities);
    %do yr = &firstyr %to &lastyr;
        %let firstqr = 1;
        %let lastqr = 4;
        %do trimestre = &firstqr %to &lastqr;
            
            %if &yr. <2000 %then %let yr2= %eval(&yr.-1900);
            %else %if &yr.>=2000 %then %let yr2=%eval(&yr.-2000);
           
            *********** unzip ENEU files to tmp directory ******************;
            %if &yr. <=1999 %then %do;
                x "cp -f &eneu./eneu&yr2./may-&trimestre.&yr2..dbf.gz &tmp.";
                x "chmod 770 &tmp./may-&trimestre.&yr2..dbf.gz";
                x "gunzip -df &tmp./may-&trimestre.&yr2..dbf.gz";
            %end;
            %else %if &yr.=2000 and &trimestre.=1 %then %do;
                x "cp -f &eneu./eneu0&yr2./may-&trimestre.0&yr2..dbf.gz &tmp.";
                x "chmod 770 &tmp./may-&trimestre.0&yr2..dbf.gz";
                x "gunzip -df &tmp./may-&trimestre.0&yr2..dbf.gz";
            %end;
            %else %if &yr.>2000 or (&yr.=2000 & &trimestre.>=2) %then %do;
                x "cp -f &eneu./eneu0&yr2./eneu&trimestre.0&yr2..dbf.gz &tmp.";
                x "chmod 770 &tmp./eneu&trimestre.0&yr2..dbf.gz";
                x "gunzip -df &tmp./eneu&trimestre.0&yr2..dbf.gz";
            %end;
                
            ***************** call macros to read in data *******************************;
        
            %if &yr. <=1993 or (&yr.=1994 and &trimestre. <= 2) %then %do;			                
				proc import datafile="&tmp./may-&trimestre.&yr2..dbf" out=temp replace;
				run;
				%readin19871994(&yr,&trimestre);
            %end;

            %else %if (&yr.=1994 and &trimestre.>=3) or (&yr.>=1995 & &yr.<=1999) %then %do;			            	
				proc import datafile="&tmp./may-&trimestre.&yr2..dbf" out=temp replace;
				run;
                %readin19942004(&yr,&trimestre);
            %end;

            %else %if (&yr.=2000 and &trimestre.=1) %then %do;			            	
				proc import datafile="&tmp./may-&trimestre.0&yr2..dbf" out=temp replace;
				run;
                %readin19942004(&yr,&trimestre);
            %end;
            
            %else %if &yr.>=2001 or (&yr.=2000 & &trimestre.>=2) %then %do;			            	
				proc import datafile="&tmp./eneu&trimestre.0&yr2..dbf" out=temp replace;
				run;
                %readin19942004(&yr,&trimestre);
            %end;
            
            *********** call macros to make consistent variable definitions ***********;

            %if &yr. <=1993 or (&yr.=1994 and &trimestre. <= 2) %then %do;
                %harmonize19871994(&yr,&trimestre);
                %end;

            %else %if (&yr.=1994 and &trimestre.>=3) or &yr.>=1995 %then %do;
                %harmonize19942004(&yr,&trimestre);
                %end;
        
            **** Sample Selection and data cleaning ********;
           
            data eneu&trimestre.&yr.;
                set temp3;
                
            run; 

            proc datasets nolist;
                delete temp temp2 temp3;
            run;
                   
            %end;
      
        *** Merge quarterly files into annual dataset;
        
        data working.eneu&yr._web;
            set %listqtr(&firstqr.,&lastqr.);
        run;
        
        proc datasets;
            delete %listqtr(&firstqr.,&lastqr.);
        run;
        
        %end;

%mend readeneu;


******* macro to input data from 1987-1994 based on 3-89 questionnaire ******;
%macro readin19871994(yr, trimestre);
	
    data temp2;
        set temp(rename=(MUN=municipi A_MET=estrato_a
            EST=estrato_b AGEB=ageb
            A_LIS=area MAN=manzana R_TRH=trh
            PAN=distsem_a D_SEM=distsem_b ENT=control_a
            CON=control_b V_SEL=numviv HOG=hogar
            H_MUD=hogarmud PER=per N_ENT=numentre T_REG=tipo1
            SEX=sexo EDA=edad E_CIV=estcivil ESC=esco FAC=factor1
            P1A1=p1a1_891 P1A3=p1a3_891 P1A4=p1a4_891 P1B=p1b_891 P1C=p1c_891
            P2F_2=p2f_891 P3A=p3a_891 P3B=p3b_891 P3C=p3c_891 P3D=p3d_891
            P4=p4_891 P5=p5_891 P5B=p5b_89 P6=p6_891 P6_1=p6_1_891
            P7=p7_891 P7A=p7a_891 P7B=p7b_891 P7C=p7c_891 P7D_1=p7d1_891 
            P7D_2=p7d2_891 P7D_3=p7d3_891 P7D_4=p7d4_891 P7D_5=p7d5_891
            P7D_6=p7d6_891 P7D_7=p7d7_891 P7D_8=p7d8_891 P7D_9=p7d9_891 P8=p8_891 PAR=par1));
        
        estrato = estrato_a||estrato_b;
        distsem = distsem_a||distsem_b;
        control = control_a||control_b;

        ** set strings to blanks;
		
        if municipi=' ' then municipi=.;
        if p7a_891='A' then p7a_891='';
		  
	** set variables as numerical values;
		par=par1+0;
        tipo=tipo1+0;
        factor=factor1+0;
        p1a1_89=p1a1_891+0; 
        p1b_89=p1b_891+0; 
        p1c_89=p1c_891+0;
        p1a4_89=p1a4_891+0;
        p1a3_89=p1a3_891+0;
        p2f_89=p2f_891+0;
        p3a_89=p3a_891+0;
        p3b_89=p3b_891+0;
        p3c_89=p3c_891+0;
        p3d_89=p3d_891+0;
        p4_89=p4_891+0;
        p5_89=p5_891+0;
        p6_89=p6_891+0;
        p6_1_89=p6_1_891+0;
        p7_89=p7_891+0;
        p7a_89=p7a_891+0;
        p7b_89=p7b_891+0;
        p7c_89=p7c_891+0;
        p7d1_89=p7d1_891+0;
        p7d2_89=p7d2_891+0;
        p7d3_89=p7d3_891+0;
        p7d4_89=p7d4_891+0;
        p7d5_89=p7d5_891+0;
        p7d6_89=p7d6_891+0;
        p7d7_89=p7d7_891+0;
        p7d8_89=p7d8_891+0;
        p7d9_89=p7d9_891+0;
        p8_89=p8_891+0;
   		
        drop par1 tipo1 factor1 p1a1_891 p1b_891 p1a3_891 p1a4_891 p2f_891
            p3a_891 p3b_891 p3c_891 p3d_891 p4_891
            p5_891 p6_1_891 p6_891 p7_891 p7a_891
            p7b_891 p7c_891 p7d1_891 p7d2_891 p7d3_891 p7d4_891 p7d5_891 p7d6_891
            p7d7_891 p7d8_891 p7d9_891 p8_891;    
        
    run;
    %mend readin19871994;

******* macro to input data from 1994-1999 based on 3-94 questionnaire ******;
%macro readin19942004(yr, trimestre);

    data temp2;            
        set temp(rename=(MUN=municipi
            A_MET=estrato_a EST=estrato_b AGEB=ageb
            A_LIS=area MAN=manzana PAN=distsem_a D_SEM=distsem_b
            ENT=control_a CON=control_b V_SEL=numviv HOG=hogar
            H_MUD=hogarmud PER=per N_ENT=numentre T_REG=tipo1 R_TRH=trh
            SEX=sexo EDA=edad E_CIV=estcivil ESC=esco FAC=factor1
            P1A1=p1a1_941 P1A3=p1a3_941 P1A4=p1a4_941 P1B=p1b_941 P1D=p1d_941
            P2F_2=p2f_941 P3=p3_941 P3A=p3a_941 P3B=p3b_941 P3C=p3c_941 P3D=p3d_941
            P3E=p3e_941 P4=p4_941 P5=p5_941 P5B=p5b_94 P6_1=p6_1_941
            P6_2=p6_2_941 P6A=p6a_941 P7=p7_941 P7A_1=p7a_1_941
            P7A_2=p7a_2_941 P7B=p7b_941 P7C=p7c_941 P7D_1=p7d1_941 P7D_2=p7d2_941
            P7D_3=p7d3_941 P7D_4=p7d4_941 P7D_5=p7d5_941 P7D_6=p7d6_941 P7D_7=p7d7_941 
            P7D_8=p7d8_941 P7D_9=p7d9_941 P8A=p8a_941 PAR=par1));
        
        estrato = estrato_a||estrato_b; **concatenate variables;
        distsem = distsem_a||distsem_b;
        control = control_a||control_b;
        
        ** set strings to blanks;
        
        if municipi=' ' then municipi=.;
        if p7a_1_941='A' then p7a_1_941='';
        
	**set variables as numerical values;
        par=par1+0;
        factor=factor1+0;
        tipo=tipo1+0;
        p1a1_94=p1a1_941+0; 
        p1b_94=p1b_941+0;
        p1a3_94=p1a3_941+0;
        p1a4_94=p1a4_941+0;
        p1d_94=p1d_941+0;
        p2f_94=p2f_941+0;
        p3_94=p3_941+0;
        p3a_94=p3a_941+0;
        p3b_94=p3b_941+0;
        p3c_94=p3c_941+0;
        p3d_94=p3d_941+0;
        p3e_94=p3e_941+0;
        p4_94=p4_941+0;
        p5_94=p5_941+0;
        p6_1_94=p6_1_941+0;
        p6_2_94=p6_2_941+0;
        p6a_94=p6a_941+0;
        p7_94=p7_941+0;
        p7a_1_94=p7a_1_941+0;
        p7a_2_94=p7a_2_941+0;
        p7b_94=p7b_941+0;
        p7c_94=p7c_941+0;
        p7d1_94=p7d1_941+0;
        p7d2_94=p7d2_941+0;
        p7d3_94=p7d3_941+0;
        p7d4_94=p7d4_941+0;
        p7d5_94=p7d5_941+0;
        p7d6_94=p7d6_941+0;
        p7d7_94=p7d7_941+0;
        p7d8_94=p7d8_941+0;
        p7d9_94=p7d9_941+0;
        p8a_94=p8a_941+0;
        
        drop par1 factor1 p1a1_941 p1b_941 p1a4_941 p1d_941
            p2f_941 p3_941 p3a_941 p3b_941 p3c_941 p3d_941 p3e_941 p4_941
            p5_941  p6_1_941 p6_2_941 p6a_941 p7_941 p7a_1_941 p7a_2_941
            p7b_941 p7c_941 p7d1_941 p7d2_941 p7d3_941 p7d4_941 p7d5_941 p7d6_941
            p7d7_941 p7d8_941 p7d9_941 p8a_941;
        
    run;
    
    %mend readin19942004;

******* macro to harmonize data based on 3-89 questionnaire **********;

%macro harmonize19871994(yr, trimestre);
    data temp3;
    	set temp2;
        
	**** Create new variables, w/ consistent coding in 1989 and 1994;
        
        if sexo in (1,2) then male = 2-sexo; else male=.;
        if edad in (98,99) then age=.; else age=edad;
        
        * married variable -- could exploit more
        * information here;
        
        if estcivil = 2 then married=1;
        else if estcivil in (1,3,4,5,6) then married=0;
        else married=.;
        
        *working variable;
        if p1a1_89 = 1 then work_lwk=1;
        else work_lwk=0;
        
        *last job variable -- time since last job for unemployed;  
        rename p2f_89 = last_job;
        rename p3a_89 = employee_type;
        rename p3c_89 = coworkers;
        rename p1a1_89 = p1a1;
        rename p6_89 = hrs_worked;
        rename p6_1_89 = hrs_worked_cat;
        rename p7_89 = pay_type;
        rename p7d5_89 = imss_d; 
        rename p1a3_89 = p1a3;
        
        *construct benefit/imss variables;
        rename p7d1_89 = aguinaldo_d;
        rename p7d2_89 = utilidades_d;
        rename p7d3_89 = vacaciones_d;
        rename p7d4_89 = credito_d;
        rename p7d5_89 = imss_d;
        rename p7d6_89 = medico_d;
        rename p7d7_89 = issste_d;
        rename p7d8_89 = segurovol_d;
        rename p7d9_89 = otherben_d;
        
        *indicator for workers without pay;
        work_nopay=0;
        if p1a4_89 in(11,12) then work_nopay=1;
        
        *indicator for workers who did not work due to temporary absence;
        *that was a legitimate reason;
        temp_abs=0;
        if p1b_89 =1 then temp_abs=1;
		
        *indicator for starting new job;
        new_job=0;
        if p1c_89 in(1,2) then new_job=1;
		
	*indicator for worked within last yr;
        work_lyr = 0;
        if p2f_89 in(1,2,3,4,5) then work_lyr=1;
        
        * full-time variable -- double-check;
        
        if p1a1_89=1 and p6_1_89 in (2,3) then fulltime=1;
        else if p1a1_89=2 or p6_1_89 in (1,4) then fulltime=0;
        else fulltime=.;
        if fulltime=0 and p6_89 >= 35 then flag=1;
        
        *individuals with full-time jobs, work w/o pay, temporarily
        *not working, will start a new job in less than 4 wks, or
        *had a job in the last year are included in section 3;
        
        include_3 = .;
        if p1a1_89 = 1 or p1a4_89 in(11,12) or p1b_89=1
         or p1c_89 in(1,2) then include_3=1;
        include_3a = .;
        if p1a1_89 = 1 or p1a4_89 in(11,12) or p1b_89=1
         or p1c_89 in(1,2) or p2f_89 in(1,2,3,4,5) then include_3a=1;
        
        rama = substr(p5b_89,1,2);
        subgrupo_cae89=p5b_89+0; * refers to cae in use from 1-87 to 2-94;
        city = substr(estrato,1,2)+0;
        state = substr(control,1,2)+0;
        
        *** construct hourly wage variable;
        
        * Note: We have required that full time workers have worked the
        * previous week. In 1994, we could have defined time using
        * hours regularly worked rather than hours worked in the
        * previous week. We did not in order to stay consistent w/ 1989
        * survey sample;
        
        * Note: the questionnaire appears to ask for wages for various
        * periods, but the critico-codificador is supposed to convert
        * this to monthly wages;
        
        mnthwage = p7a_89;
        if mnthwage = 999999 then mnthwage=.; * recode missing values;
        if mnthwage = 999998 then mnthwage=999998*1.5; * assign top-coded values 1.5*topcode;
        if &yr.=1987 then mnthwage=mnthwage/1000; * express hourly wage in new pesos;
        if p6_89 > 0 and p6_89<=94 then mnthhour = p6_89*4.3;
        else if p6_89=96 then mnthhour=20*4.3;
        else if p6_89=97 then mnthhour=42*4.3;
        else if p6_89=98 then mnthhour=60*4.3;      
        else mnthhour = .;
        if mnthhour ^= . then hourwage = mnthwage/mnthhour;
        
        * daily wage;
        dailywage = hourwage*8;
       
        relate_mw = .;
        if p7a_89 = 999999 then do;
            if p7b_89 = 1 then relate_mw = 1;
            if p7b_89 = 2 | p7b_89 = 3 then do;
                if p7c_89 = 1 then relate_mw = 0.1;
                if p7c_89 = 2 then relate_mw = 0.375;
                if p7c_89 = 3 then relate_mw = 0.75;
                if p7c_89 = 4 then relate_mw = 1.5;
                if p7c_89 = 5 then relate_mw = 2.5;
                if p7c_89 = 6 then relate_mw = 4;
                if p7c_89 = 7 then relate_mw = 7.5;
                if p7c_89 = 8 then relate_mw = 11;
                if p7c_89 = 9 then relate_mw = 99;
                end;
            end;
        
      *** create new schooling variable

      * These codes assigned on the basis of the catalogo de
      * escolaridad and the Catalogo de Codificacion de la TRH, both
      * in esco8794.doc;
        
      * For escolaridad in 1989, T stands for estudios terminados, N
      * for non-terminados, 9 if unknown whether estudios are
      * terminados;
        
      * Carrera (occupation) is used to determine schooling when
      * direct info not available;
                    
        esco_yrs = substr(esco,1,1); *extract first character of esco;
        esco_niv = substr(esco,2,1); *extract second character of esco;
        esco_occ = substr(esco,3,3); *extract 3-5 characters of esco;
        if esco_occ = '000' then do; *direct info on schooling available;
            if esco_niv='1' then do;
                if esco_yrs in ('1','2','3','4','5','6','7','8') then esco1 = 0+esco_yrs;
                else if esco_yrs='9' then esco1=3;
                else if esco_yrs='T' then esco1=8;
                else if esco_yrs='N' then esco1=7;
                end; 
            else if esco_niv='2' then do;
                if esco_yrs in ('1','2','3','4','5','6','7','8') then esco1 = 6+esco_yrs;
                else if esco_yrs='9' then esco1=7;
                else if esco_yrs='T' then esco1=11;
                else if esco_yrs='N' then esco1=10;
                end;
            else if esco_niv='3' then do;
                if esco_yrs in ('1','2','3','4','5','6','7','8') then esco1 = 9+esco_yrs;
                else if esco_yrs='9' then esco1=10;
                else if esco_yrs='T' then esco1=14;
                else if esco_yrs='N' then esco1=13;
                end;
            else if esco_niv='4' then do;
                if esco_yrs in ('1','2','3','4','5','6','7','8') then esco1 = 12+esco_yrs;
                else if esco_yrs='9' then esco1=14;
                else if esco_yrs='T' then esco1=16;
                else if esco_yrs='N' then esco1=14;
                end;
            else if esco_niv='5' then do;
                if esco_yrs in ('1','2','3','4','5','6','7','8') then esco1 = 16+esco_yrs;
                else if esco_yrs='9' then esco1=18;
                else if esco_yrs='T' then esco1=20;
                else if esco_yrs='N' then esco1=18;
                end; 
            else do;
                if esco = '99000' then esco1=.;
                else if esco = '98000' then esco1=0; *assign 0 if illiterate w/ no other report;
                else if esco = '97000' then esco1=3; *assign 3 if literate w/ no other report;
                end;
            end;
        else if esco_occ ne '000' then do;
            
        * Note that there is an issue in ENEU schooling codes: we do
        * not know if vocational training was concurrent with regular
        * school or came after graduation.;
                        
        * Description in icesc.doc for later period suggests the
        * following: N and T used for vocational training after
        * graduation for primaria, secundaria, prepa, used for
        * simultaneous training at level of licenciatura/maestria+;
                  
            if esco_yrs in ('T') then do;
                if esco_niv = '1' then esco1=8;
                else if esco_niv = '2' then esco1=11;
                else if esco_niv='3' then esco1=14;
                else if esco_niv='4' then esco1=16;
                else if esco_niv in ('5','6','7','8') then esco1=20;
                end;
            else if esco_yrs='N' then do;
                if esco_niv = '1' then esco1=7;
                else if esco_niv = '2' then esco1=10;
                else if esco_niv = '3' then esco1=13;
                else if esco_niv = '4' then esco1=14;
                else if esco_niv in ('5','6','7','8') then esco1=18;
                end;
            else do; *does not have T or N in first position;
                if esco_niv='4' then do;
                    if esco_yrs+0<=8 then esco1=12+esco_yrs;
                    else esco1 = 14; * if esco_yrs = 9 then number of years is not known;
                    end;
                else if esco_niv='5' then do;
                    if esco_yrs+0<=8 then esco1=16+esco_yrs;
                    else esco1=18;
                    end;
                else do;
                    
                    * vocational where interviewer could not determine
                    * whether respondent completed vocational training
                    * or not;
                    
                    * Note: assume that vocational training was not
                    * finished;
                    
                    if esco_niv='1' then esco1=7;
                    else if esco_niv='2' then esco1=10;
                    else if esco_niv='3' then esco1=13;
                    end;
                end;
            end;
        
      *** Create occupation variable;
        
      * Note: Occupation codes defined in 1980, changed in 1990 and
      * 1992 -- see crosswalks in appendices to vol. 1 of CMO-96;
        
        if floor(p4_89/100)=11 then occ=1;
        else if floor(p4_89/100)=12 then occ=2;
        else if floor(p4_89/100)=13 then occ=3;
        else if floor(p4_89/100)=14 then occ=4;
        else if floor(p4_89/100) in (21,22,31) and floor(p4_89/10)^=220 then occ=5;
        else if floor(p4_89/100) in (41,42) or floor(p4_89/10) in (431,439,526) then occ=6;
        else if floor(p4_89/100)=51 then occ=7;
        else if floor(p4_89/10) in (520,522,523,527) then occ=8;
        else if floor(p4_89/10) in (521,524,525) then occ=9;
        else if floor(p4_89/10)=529 or floor(p4_89/100)=53 then occ=10;
        else if floor(p4_89/10) =430 or floor(p4_89/100)=83 then occ=11;
        else if floor(p4_89/10)=610 then occ=12;
        else if floor(p4_89/10) in (612,613,614,615,616,617,618,619) then occ=13;
        else if floor(p4_89/100)=71 then occ=14;      
        else if floor(p4_89/100)=72 then occ=15;
        else if floor(p4_89/100)=81 and floor(p4_89/10)^=812 then occ=16;
        else if floor(p4_89/100)=82 then occ=17;
        else if floor(p4_89/100)=84 then occ=18;
        else if floor(p4_89/100)=99 then occ=19;
        else if floor(p4_89/10)=611 then occ=20;
        else if floor(p4_89/10)=812 then occ=21;
        else if floor(p4_89/10)=220 then occ=22;
        
	*construct firm size variable;
	firmsize89=.;
	if p3a_89 in (1, 2, 3) then firmsize89=p3b_89;
	if 4<=p3a_89<=9 then firmsize89=p3d_89;

   	label
            municipi = 'municipio [municipality]'
            estrato = 'estrato [stratum]'
            ageb = 'Area Geoestadistica Basica'
            area = 'area de listado [listing area]'
            manzana = 'manzana [block]'
            distsem = 'distribucion semanal [weekly dist.]'
            control = 'control [??]'
            numviv = 'numero de vivienda [dwelling no.]'
            hogar = 'hogar [household]'
            hogarmud = 'hogar mudado [household moved]'
            per = 'periodo [period]'
            tipo = 'tipo de registros [type of record]'
            sexo = 'sexo [sex]'
            edad = 'edad [age]'
            estcivil = 'estado civil [marital status]'
            esco = 'escolaridad [schooling]'
            hourwage = 'hourly wage'
            mnthwage = 'monthly wage'
            mnthhour = 'monthly hours'
            male = 'male'
            married = 'married'
            age = 'age'
            esco1 = 'schooling recode'
            occ = 'occupation'
            subgrupo_cae89 = 'industry, 4-digit'
            rama = 'industry, 2-digit'
            city = 'city, 2-digit'
            state = 'state, 2-digit'
            factor = 'weight'
            fulltime = 'full-time worker'
            trimestre = 'quarter'
            periodo = 'year'
            eneu = 'eneu data indicator'
            work_lwk = 'worked last week'
            work_nopay = 'worked in non-paying job'
            temp_abs = 'temporary absence from work last wk'
            new_job = 'starting next job <4 wks'
            p2f_89 = 'months since last job'
            p3a_89 = 'type of employee'
            p3c_89 = 'type of coworkers'
            work_lyr = 'worked within last year'
            p6_89 = 'total hrs worked'
            p6_1_89 = 'hrs worked, categories'
            p7_89 = 'paycheck type'
            p7d1_89 = 'aguinaldo [detailed]'
            p7d2_89 = 'utilidades [detailed]'
            p7d3_89 = 'vacaciones [detailed]'
            p7d4_89 = 'credito [detailed]'
            p7d5_89 = 'imss [detailed]'
            p7d6_89 = 'seguro medico [detailed]'
            p7d7_89 = 'issste [detailed]'
            p7d8_89 = 'seguro social voluntario [detailed]'
            p7d9_89 = 'otro [detailed]'
            flag = 'hours worked mismatch'
            include_3 = 'asked questions in part 3'
            include_3a = 'asked questions in part 3a'
            firmsize89 = 'firm size [1989]'
            ;
        
    eneu=1;
    periodo=&yr.;
    trimestre = &trimestre.;
     
    run;
    %mend harmonize19871994;


******* macro to harmonize 1994-2004 data based on 3-94 questionnaire **********;

%macro harmonize19942004(yr, trimestre);
    data temp3;
    	set temp2;
        
        *Create new variables, w/ consistent coding in 1989 and 1994;
        
        if sexo in (1,2) then male = 2-sexo; else male=.;
        if edad in (98,99) then age=.; else age=edad;
        
        *married variable;
        
        if estcivil = 2 then married=1;
        else if estcivil in (1,3,4,5,6) then married=0;
        else married=.;
        
        *working variable;
        if p1a1_94 = 1 then work_lwk=1;
        else work_lwk=0;
        
        *last job variable -- time since last job for unemployed;
        *number of jobs -- number of jobs currently working;
        *coworkers type variable;
        *contract type variable;
        *p1a1_94 - worked last wk, remove yr;
        *hours worked variable;
        *hours worked categories;
        
        rename p2f_94 = last_job;
        rename p3_94 = num_jobs;
        rename p3c_94 = coworkers;
        rename p3d_94 = contract_type;
        rename p1a1_94 = p1a1;
        rename p6_1_94 = hrs_worked;
        rename p6_2_94 = hrs_worked_reg;
        rename p6a_94 = hrs_worked_cat;
        rename p7_94 = pay_type;
        rename p7d4_94 = imss_d;
        rename p1a3_94 = p1a3;
        
        *construct benefit/imss variables;
        rename p7d1_94 = aguinaldo_d;
        rename p7d2_94 = vacaciones_d;
        rename p7d3_94 = utilidades_d;
        rename p7d4_94 = imss_d;
        rename p7d5_94 = issste_d;
        rename p7d6_94 = sar_d;
        rename p7d7_94 = credito_d;
        rename p7d8_94 = medico_d;
        rename p7d9_94 = otherben_d;
        
        
        *employee type variable based on 1989 definition, which 
	*differs from 1994;
        employee_type = .;
        if p3a_94=1 then employee_type = 1;
        if p3a_94=2 then employee_type = 3;
        if p3a_94=3 then employee_type = 6;
        if p3a_94=4 then employee_type = 5;
        if p3a_94=5 then employee_type = 4;
        if p3a_94=6 then employee_type = 8;
        if p3a_94=7 then employee_type = 7;
        if p3a_94=9 then employee_type = 9;
		
        *indicator for workers without pay;
        work_nopay=0;
        if p1a4_94 in(11,12) then work_nopay=1;
        
        *indicator for workers who did not work due to temporary absence;
        *that was a legitimate reason;
        temp_abs=0;
        if p1b_94 in(1,2,3) then temp_abs=1;
		
	*indicator for starting new job;
        new_job=0;
        if p1d_94 in(1,2) then new_job=1;
		
	*indicator for worked within last yr;
        work_lyr = 0;
        if p2f_94 in(1,2,3,4,5) then work_lyr=1;
		
        *full-time variable -- double-check;
        
        if p1a1_94 = 1 and p6a_94 in (2,3) then fulltime=1;
        else if p1a1_94=2 or p6a_94 in (1,4) then fulltime=0;
        else fulltime=.;
        if fulltime=0 and p6_1_94 >= 35 then flag1=1;
        
        *individuals with full-time jobs, work w/o pay, temporarily
        *not working, will start a new job in less than 4 wks, or
        *had a job in the last year are included in section 3;
        include_3 = .;
        if p1a1_94 = 1 or p1a4_94 in(11,12) or p1b_94 in(1,2,3)
            or p1d_94 in(1,2) then include_3=1;
        include_3a = .;
        if p1a1_94 = 1 or p1a4_94 in(11,12) or p1b_94 in(1,2,3)
            or p1d_94 in(1,2) or p2f_94 in(1,2,3,4,5) then include_3a=1;
        
        
        rama = substr(p5b_94,1,2);
        subgrupo_cae94=p5b_94+0; *refers to cae in use from 3-94 to 4-99 (and possibly later);
        city = substr(estrato,1,2)+0;
        state = substr(control,1,2)+0;
        
        *** construct hourly wage variable;
        
        * Note: the questionnaire appears to ask for wages for various
        * periods, but the critico-codificador is supposed to convert
        * this to monthly wages;
        
        mnthwage = p7a_2_94;
        if mnthwage = 999999 then mnthwage=.;
        if mnthwage = 999998 then mnthwage=999998*1.5; * assign top-coded values 1.5*topcode;
        
        * hours;
        
        if (p6_1_94 > 0 and p6_1_94 <= 94) or p6_1_94 in (96,97,98) then do;
            if (p6_1_94 > 0 and p6_1_94 <= 94) then mnthhour=p6_1_94*4.3; 
            else if p6_1_94=96 then mnthhour=20*4.3;
            else if p6_1_94=97 then mnthhour=42*4.3;
            else if p6_1_94=98 then mnthhour=60*4.3; 
            end;     
        else mnthhour=.;
        
        * hourly wage;
        
        if mnthhour ^= . then hourwage = mnthwage/mnthhour;

	* daily wage;
        dailywage = hourwage*8;
        
        relate_mw = .;
        if p7a_2_94 = 999999 then do;
            if p7b_94 = 1 then relate_mw = 0.1;
            if p7b_94 = 2 then relate_mw = 1;
            if p7b_94 = 3 then do;
                if p7c_94 = 1 then relate_mw = 1.5;
                if p7c_94 = 2 then relate_mw = 2.5;
                if p7c_94 = 3 then relate_mw = 4;
                if p7c_94 = 4 then relate_mw = 7.5;
                if p7c_94 = 5 then relate_mw = 15;
                if p7c_94 = 6 then relate_mw = 21;
                if p7c_94 = 9 then relate_mw = 99;
                end;
            end;

        
      *** create new schooling variable

      * These codes assigned on the basis of the Instructivo de
      * Codificacion de Escolaridad, in icesc.doc;

      * For escolaridad in 1989, T stands for estudios terminados, N
      * for non-terminados, 9 if unknown whether estudios are
      * terminados.;

      * Carrera (occupation) is used to determine schooling when
      * direct info not available;
                    
        esco_niv = substr(esco,1,1);
        esco_yrs = substr(esco,2,1);
        esco_occ = substr(esco,3,3);
        if esco_occ = '000' then do; *direct info on schooling available;
            if esco_niv='1' then do;
                if esco_yrs in ('1','2','3','4','5','6') then esco1 = 0+esco_yrs;
                
                * indicates carrera tecnica after primaria with
                * unknown number of years (I do not understand this
                * note to myself. Is it referring to 7? To T?;
                
                else if esco_yrs='7' then esco1=7;  
                else if esco_yrs='9' then esco1=3;
                else if esco_yrs='T' then esco1=8;
                else if esco_yrs='N' then esco1=7;
                end; 
            else if esco_niv='2' then do;
                if esco_yrs in ('1','2','3','4','5','6','7','8') then esco1 = 6+esco_yrs;
                else if esco_yrs='9' then esco1=7;
                else if esco_yrs='T' then esco1=11;
                else if esco_yrs='N' then esco1=10;
                end;
            else if esco_niv='3' then do;
                if esco_yrs in ('1','2','3','4','5','6','7','8') then esco1 = 9+esco_yrs;
                else if esco_yrs='9' then esco1=10;
                else if esco_yrs='T' then esco1=14;
                else if esco_yrs='N' then esco1=13;
                end;
            else if esco_niv='4' then do;
                if esco_yrs in ('1','2','3','4','5','6','7','8') then esco1 = 12+esco_yrs;
                else if esco_yrs='9' then esco1=14;
                else if esco_yrs='T' then esco1=16;
                else if esco_yrs='N' then esco1=14;
                end;
            else if esco_niv='5' then do;
                if esco_yrs in ('1','2','3','4','5','6','7','8') then esco1 = 16+esco_yrs;
                else if esco_yrs='9' then esco1=18;
                else if esco_yrs='T' then esco1=20;
                else if esco_yrs='N' then esco1=18;
                end; 
            else do;
                if esco = '98000' then esco1=0; *assign 0 if illiterate w/ no other report;
                else if esco = '97000' then esco1=3; *assign 3 if literate w/ no other report;
                else if esco = '96000' then esco1=3; *assign 3 if in adult school but not finished;
                else if esco = '99000' then esco1=.;
                end;
            end;
        else if esco_occ ne '000' then do;
            
        * Note that there is an issue in ENEU schooling codes: we do
        * not know if vocational training was concurrent with regular
        * school or came after graduation.;
                        
        * Description in icesc.doc suggests the following: N and T
        * used for vocational training after graduation for primaria,
        * secundaria, prepa, used for simultaneous training at level
        * of licenciatura/maestria+;
                        
            if esco_yrs in ('T') then do;
                if esco_niv = '1' then esco1=8;
                else if esco_niv = '2' then esco1=11;
                else if esco_niv='3' then esco1=14;
                else if esco_niv='4' then esco1=16;
                else if esco_niv in ('5','6','7','8') then esco1=20;
                end;
            else if esco_yrs='N' then do;
                if esco_niv = '1' then esco1=7;
                else if esco_niv = '2' then esco1=10;
                else if esco_niv = '3' then esco1=13;
                else if esco_niv = '4' then esco1=14;
                else if esco_niv in ('5','6','7','8') then esco1=18;
                end;
            else do; *does not have T or N in second position;
                if esco_niv='4' then do;
                    if esco_yrs in ('1','2','3','4','5','6','7','8') then esco1=12+esco_yrs;
                    else esco1 = 14; * if esco_yrs = 9 then number of years is not known;
                    end;
                else if esco_niv='5' then do;
                    if esco_yrs in ('1','2','3','4','5','6','7','8')  then esco1=16+esco_yrs;
                    else esco1=18;
                    end;
                else do;
                    
                * vocational where interviewer could not determine
                * whether respondent completed vocational training or
                * not;
                    
                * Note: assume that vocational training was not
                * finished;
                    
                    if esco_niv='1' then esco1=7;
                    else if esco_niv='2' then esco1=10;
                    else if esco_niv='3' then esco1=13;
                    end;
                end;
            end;
        
      *** Create occupation variable (following Pablos code);
        
      * Note: Occupation codes defined in 1980, changed in 1990 and
      * 1992 -- see crosswalks in appendices to vol. 1 of CMO-96;
               
        if floor(p4_94/100)=11 then occ=1;
        else if floor(p4_94/100)=12 then occ=2;
        else if floor(p4_94/100)=13 then occ=3;
        else if floor(p4_94/100)=14 then occ=4;
        else if floor(p4_94/100)=21 and floor(p4_94/10)^=211 then occ=5;
        else if floor(p4_94/100)=41 then occ=6;
        else if floor(p4_94/100)=51 then occ=7;
        else if floor(p4_94/100)=52 then occ=8;
        else if floor(p4_94/100)=53 then occ=9;
        else if floor(p4_94/100)=54 then occ=10;
        else if floor(p4_94/100)=55 then occ=11;
        else if floor(p4_94/100)=61 then occ=12;
        else if floor(p4_94/100)=62 and floor(p4_94/10)^=620 then occ=13;
        else if floor(p4_94/100)=71 then occ=14;      
        else if floor(p4_94/100)=72 then occ=15;
        else if floor(p4_94/100)=81 and floor(p4_94/10)^=812 then occ=16;
        else if floor(p4_94/100)=82 then occ=17;
        else if floor(p4_94/100)=83 then occ=18;
        else if floor(p4_94/100)=99 then occ=19;
        else if floor(p4_94/10)=620 then occ=20;
        else if floor(p4_94/10)=812 then occ=21;
        else if floor(p4_94/10)=211 then occ=22;

	
	*construct firm size variable;
	firmsize94=.;
	if p3a_94 in(1, 2) then firmsize94=p3b_94;
	if 3<=p3a_94<=8 then firmsize94=p3e_94;
			
        label
            municipi = 'municipio [municipality]'
            estrato = 'estrato [stratum]'
            ageb = 'Area Geoestadistica Basica'
            area = 'area de listado [listing area]'
            manzana = 'manzana [block]'
            distsem = 'distribucion semanal [weekly dist.]'
            control = 'control [??]'
            numviv = 'numero de vivienda [dwelling no.]'
            hogar = 'hogar [household]'
            hogarmud = 'hogar mudado [household moved]'
            per = 'periodo [period]'
            tipo = 'tipo de registros [type of record]'
            sexo = 'sexo [sex]'
            edad = 'edad [age]'
            estcivil = 'estado civil [marital status]'
            esco = 'escolaridad [schooling]'
            numentre = 'numero de entrevista [interview no.]'
            hourwage = 'hourly wage'
            mnthwage = 'monthly wage'
            mnthhour = 'monthly hours'
            male = 'male'
            married = 'married'
            age = 'age'
            esco1 = 'schooling recode'
            occ = 'occupation'
            subgrupo_cae94 = 'industry, 4-digit'
            rama = 'industry, 2-digit'
            city = 'city, 2-digit'
            state = 'state, 2-digit'
            factor = 'weight'
            fulltime = 'full-time worker'
            trimestre = 'quarter'
            periodo = 'year'
            eneu = 'eneu data indicator'
            firmsize94 = 'firmsize [1994]'
            work_lwk = 'worked last week'
            work_nopay = 'worked in non-paying job'
            temp_abs = 'temporary absence from work last wk'
            new_job = 'starting next job <4 wks'
            p2f_94 = 'months since last job'
            work_lyr = 'worked within last year'
            p3_94 = '# of current jobs'
            p3a_94 = 'type of employee'
            p3c_94 = 'type of coworkers'
            p3d_94 = 'type of contract'
            p6_1_94 = 'hrs worked, categories'
            p7_94 = 'paycheck type'
            include_3 = 'asked questions in part 3'
            include_3a = 'asked questions in part 3a'
            firmsize94 = 'firm size [1994]'
            p6_2_94 = 'hrs worked [regularly]'
            p6a_94 = 'hrs worked [categories]'
            p7d1_94 = 'aguinaldo [detailed]'
            p7d2_94 = 'vacaciones [detailed]'
            p7d3_94 = 'utilidades [detailed]'
            p7d4_94 = 'imss [detailed]'
            p7d5_94 = 'issste [detailed]'
            p7d6_94 = 'sar [detailed]'
            p7d7_94 = 'credito [detailed]'
            p7d8_94 = 'seguro medico [detailed]'
            p7d9_94 = 'otro [detailed]'
            ;
        
    eneu=1;
    periodo=&yr.;
    trimestre = &trimestre.;
        
    run;
    
    %mend harmonize19942004;

*********** Macro to list quarterly datasets *************;

%macro listqtr(firstq,lastq);
    %do trimestre  = &firstq. %to &lastq.;
        eneu&trimestre.&yr.
            %end;
    %mend listqtr; 

************ Macro to list yearly datasets ***************;

%macro listyr(firsty,lasty);
    %do yr=&firsty. %to &lasty.;
        tmp.eneu&yr.
            %end;
    %mend listyr; 

*************************************************************************************;
******************************** main program ***************************************;
*************************************************************************************;

******** invoke main macro ***********;
%let fyr=1987;
%let lyr=2004;

%readeneu(&fyr., &lyr.);

x "/bin/rm -f &tmp./eneu*.dbf";
x "/bin/rm -f &tmp./may*.dbf";
x "gzip -f &working./eneu*.sas7bdat";

